knitr::opts_chunk$set(echo = TRUE) # uncomment lines below to install packages # install.packages("survminer", repos = "http://cran.us.r-project.org") # install.packages("Rtsne", repos = "http://cran.us.r-project.org") # install.packages("devtools", repos = "http://cran.us.r-project.org") # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("cytoMEM") # install.packages("tidyverse", repos = "http://cran.us.r-project.org") # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("flowCore") # BiocManager::install("FlowSOM") # BiocManager::install("Biobase") # install.packages("ggpubr", repos = "http://cran.us.r-project.org") # uncomment line below to install RAPID # devtools::install_github("cytolab/RAPID") # load packages into the working library library(devtools) library(cytoMEM) library(RAPID) library(tidyverse) library(ggpubr) library(flowCore) library(Biobase) library(FlowSOM) library(survival) library(survminer) library(plyr) library(Rtsne) library(cowplot) library(RColorBrewer) # set working directory setwd(paste(getwd(),"/data_files/gbm_dataset/", sep = "")) # set output file name tag output_filename = "_RAPID" # read data into R data.set <- dir(pattern="*.fcs") data <- lapply(lapply(data.set,read.FCS),exprs) combined.patient.data = as.data.frame(do.call(rbind, mapply(cbind, data, "FILE_ID"= c(1:length(data)), SIMPLIFY=F))) names <- colnames(combined.patient.data) colnames(combined.patient.data) =c((read.FCS(data.set[[1]])@parameters@data[["desc"]]),"FILE_ID") # to see patient order, uncomment and run line below # data.set # create varible for survival or clinical data OS.data.set <- dir(pattern="*.csv") OS.data = read.csv(OS.data.set)
# selecting t-SNE columns from data set tsne.data <- combined.patient.data %>% select(ends_with('_2')) # transform other data transformed.data <- combined.patient.data %>% select(-contains('FILE_ID')) %>% mutate_all(function(x) asinh(x/5)) # choose markers to use for variance calculation and transform chosen.markers <- transformed.data %>% select(contains('(v2)')) # create variable for markers to use in MEM MEMdata = cbind(chosen.markers,transformed.data$`CycinB1-139`)
setwd(paste(getwd(),"/data_files/gbm_dataset/", sep = "")) dir.create("./output files/") range <- apply(apply(tsne.data, 2, range), 2, diff) graphical.ratio <- (range[1] / range[2]) # UMAP flat dot plot and density dot plot (1 dot = 1 cell) tsne.plot <- data.frame(x = tsne.data[, 2], y = tsne.data[, 1]) # density dot plot ggplot(tsne.plot, aes(x = x, y = y)) + coord_fixed(ratio = graphical.ratio)+ geom_point(size = 0.5) +geom_density_2d_filled(bins = 29) +scale_fill_manual(values = c("NA","NA","NA","NA","NA","NA","NA",viridis::viridis(24,option = "A"))) + labs(x = "t-SNE 1", y = "t-SNE 2", title = "Density on t-SNE Axes") + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+theme(legend.position = "none") + labs(caption = "Data from Sinnaeve et al., eLife 2020") tsne.by.marker<-as_tibble(tsne.data) %>% bind_cols(MEMdata) %>% gather(channel, intensity, -tSNE1_2, -tSNE2_2) %>% mutate(across(channel,factor))%>% group_split(channel) %>% map( ~ggplot(.,aes(x= tSNE2_2, y= tSNE1_2, col = intensity)) + geom_point(size = 2) + scale_color_gradientn( colours = colorRampPalette(rev(brewer.pal(n = 11, name = "Spectral")))(5))+ facet_grid(~ channel, labeller = function(x) label_value(x, multi_line = FALSE)) + coord_fixed() + theme_bw()+ theme(strip.text.x = element_text(size = 20),legend.title=element_blank()))%>% plot_grid(plotlist = ., align = 'hv', ncol = 8) png(paste("./output files/",strftime(Sys.time(),"%Y-%m-%d_%H%M%S"),"t-SNE on transformed data.png"),height = 2000,width = 4000) print(tsne.by.marker) dev.off()
# find ideal numbers of clusters and create cluster ID data # Here we find 44 clusters, so below is the code to find those 44 clusters mat.input <- as.matrix(tsne.data) metadata <- data.frame(name = dimnames(mat.input)[[2]], desc = dimnames(mat.input)[[2]]) metadata$range <- apply(apply(mat.input, 2, range), 2, diff) metadata$minRange <- apply(mat.input, 2, min) metadata$maxRange <- apply(mat.input, 2, max) input.flowframe <- new("flowFrame", exprs=mat.input,parameters = AnnotatedDataFrame(metadata)) fSOM <- FlowSOM(input.flowframe, scale = TRUE, colsToUse = c(1:2), nClus = 44, seed = 38) optimized.cluster.data <- as.vector(as.numeric(GetMetaclusters(fSOM))) # To run the optimization, uncomment and run the line below # optimized.cluster.data = optimize_FlowSOM_clusters(tsne.data,chosen.markers, N = 50, seed = 38) # plot optimized FlowSOM clusters FlowSOM_clusters_plot <- plot_clusters(x = tsne.data[,2],y = tsne.data[,1], clusters = as.factor(optimized.cluster.data),xlab ="t-SNE 2",ylab = "t-SNE 1",legendtitle = "FlowSOM Cluster",title = "Cancer Cells t-SNE with FlowSOM Clusters") FlowSOM_clusters_plot
# cluster ID in 1st column and patient ID in 2nd column cluster.patient.data = as.data.frame(cbind(optimized.cluster.data,combined.patient.data$FILE_ID)) # calculate survival statistics survival_stats <- subset_survival_analysis(cluster.patient.data, OS.data, clinical_col = 3, status_col = 4) # plot significant survival curves survival_plots <- plot_survival_curves(survival_stats, cluster.patient.data, OS.data, clinical_col = 3, status_col = 4) survival_plots # find significant subsets and assign prognostic value (1 = none, 2 = positive prog., 3 = negative prog.) prognostic_subsets = significant_clusters(survival_stats, cluster_data = optimized.cluster.data) # plot significant subsets RAPID_plot <- plot_sig_clusters(x = tsne.data[,2], y = tsne.data[,1], clusters = optimized.cluster.data, survival_stats,xlab ="t-SNE 2",ylab = "t-SNE 1",legendtitle = "Prognostic Subsets",title = "Cancer Cells t-SNE with Prognostic Populations") RAPID_plot
setwd(paste(getwd(),"/data_files/gbm_dataset/", sep = "")) cluster = optimized.cluster.data MEM.data = cbind(MEMdata,cluster) # use MEM to find marker enrichments for FlowSOM clusters # identity proteins and signaling proteins MEM.values.p = MEM(MEM.data, transform = FALSE, cofactor = 0, choose.markers = FALSE, markers = "1:4,8,11:16,18:21,24",choose.ref = FALSE, zero.ref = FALSE, rename.markers = FALSE, new.marker.names = "TUJ1,CD117,S100B,CD34,NCAM,CD49F,CD133,PDGFRa,SOX2,CD15,EGFR,CD171,Nestin,CD44,GFAP,HLA-DR",file.is.clust = FALSE, add.fileID = FALSE, IQR.thresh = NULL) MEM.values.s = MEM(MEM.data, transform = FALSE, cofactor = 0, choose.markers = FALSE, markers = "5:7,9:10,17,22:23,25",choose.ref = FALSE, zero.ref = FALSE, rename.markers = FALSE, new.marker.names = "p-STAT5,p-AKT,p-STAT1,p-p38,p-STAT3,p-NFkB,p-ERK,p-S6,CyclinB1",file.is.clust = FALSE, add.fileID = FALSE, IQR.thresh = NULL) # build MEM heatmap and output enrichment scores build_heatmaps(MEM.values.p, cluster.MEM = "both", cluster.medians = "none", display.thresh = 1, output.files = TRUE, labels = FALSE, only.MEMheatmap = FALSE) build_heatmaps(MEM.values.s, cluster.MEM = "both", cluster.medians = "none", display.thresh = 0, output.files = TRUE, labels = FALSE, only.MEMheatmap = FALSE)
setwd(paste(getwd(),"/data_files/gbm_dataset/", sep = "")) # create output files folder by uncommenting and running line below dir.create(file.path(getwd(), "output files"), showWarnings = FALSE) # export figures to PDF ggexport(FlowSOM_clusters_plot,RAPID_plot,survival_plots,filename = paste("./output files/",strftime(Sys.time(),"%Y-%m-%d_%H%M%S"),output_filename,".pdf",sep=""), width = 7.2, height = 5.4) combined.patient.data = as.data.frame(do.call(rbind, mapply(cbind, data, "FILE_ID"= c(1:length(data)), SIMPLIFY=F))) data.to.fcs = cbind(combined.patient.data,prognostic_subsets) name<-c(names[1:(ncol(combined.patient.data)-1)],"Cluster","Prog_Status") # export separate.fcs.files = split(data.to.fcs,data.to.fcs$`FILE_ID`) for (i in 1:length(separate.fcs.files)){ reduce.data = subset(separate.fcs.files[[i]], select=-c(`FILE_ID`)) mat.input<- as.matrix(reduce.data) metadata <- data.frame(name = dimnames(mat.input)[[2]], desc = name) metadata$range <- apply(apply(mat.input, 2, range), 2, diff) metadata$minRange <- apply(mat.input, 2, min) metadata$maxRange <- apply(mat.input, 2, max) input.flowframe <- new("flowFrame", exprs=mat.input,parameters = AnnotatedDataFrame(metadata)) newname = str_remove(data.set[i], ".fcs") new.filename = paste0("./output files/",strftime(Sys.time(),"%Y-%m-%d_%H%M%S"),"_",newname,"_RAPID.fcs",sep="") write.FCS(input.flowframe,filename = new.filename) print(paste("FCS file ",i," done", sep = ""))} # print session information sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.